package org.apache.nutch.indexer.mongodb;
import java.io.IOException;
import java.util.Date;
import java.util.Map.Entry;
import org.apache.hadoop.mapred.JobConf;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.indexer.NutchField;
import org.apache.nutch.indexer.NutchIndexWriter;
import org.apache.solr.common.util.DateUtil;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.Mongo;
public class MongodbWriter implements NutchIndexWriter{
private Mongo mongo;
@Override
public void open(JobConf job, String name) throws IOException {
mongo = new Mongo(job.get(MongodbConstants.SERVER_URL));
}
@Override
public void write(NutchDocument doc) throws IOException {
// Connect to a mongodb database
DB db = mongo.getDB( "nutch" );
DBCollection col = db.getCollection("index");
// Setup the mongodb db object
BasicDBObject mongoDoc = new BasicDBObject();
for(final Entry<String, NutchField> e : doc) {
for (final Object val : e.getValue().getValues()) {
String key;
// normalise the string representation for a Date
Object val2 = val;
if (val instanceof Date){
key = e.getKey();
val2 = DateUtil.getThreadLocalDateFormat().format(val);
mongoDoc.put(key, val2);
} else {
key = e.getKey();
mongoDoc.put(key, val);
}
}
}
// insert the document into mongodb
col.insert(mongoDoc);
}
@Override
public void close() throws IOException {
if ( mongo != null ) {
mongo.close();
mongo = null;
}
}
}